%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')
from churnfli.main import main
GINIs, sensi_fig, churn_cat, churn_num_fig = main()
sensi_fig.show()
churn_num_fig.show()
##################################################### Perform rough feature selection with a RF model... ##################################################### Mean accuracy on the train data is 0.7459555555555556 These trival features will be excluded from modelling: ['paperlessbilling', 'multiplelines', 'seniorcitizen', 'dependents', 'partner', 'phoneservice', 'gender'] ############################################################## ###################### Start to train Logistic models... ############################################################## Performing grid search... Fitting 5 folds for each of 108 candidates, totalling 540 fits Saving model: ./models/v3_Logistic ############################################################## ###################### Start to train RF models... ############################################################## Performing grid search... Fitting 5 folds for each of 27 candidates, totalling 135 fits Saving model: ./models/v3_RF ############################################################## ###################### Start to train XGB models... ############################################################## Performing grid search... Fitting 5 folds for each of 1 candidates, totalling 5 fits [11:48:35] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior. Saving model: ./models/v3_XGB Loading model: models/v3_Logistic Saving model: ./models/final/v3_Logistic_final Loading model: models/v3_RF Saving model: ./models/final/v3_RF_final Loading model: models/v3_XGB [11:48:36] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior. Saving model: ./models/final/v3_XGB_final ##################################################### GINI on the train and test data for the re-trained best models are: #####################################################
| train | test | |
|---|---|---|
| v3_Logistic_final | 0.677458 | 0.711522 |
| v3_RF_final | 0.700301 | 0.708517 |
| v3_XGB_final | 0.686274 | 0.703473 |
Loading model: models/final/v3_Logistic_final Loading model: models/final/v3_RF_final Loading model: models/final/v3_XGB_final Loading model: models/final/v3_RF_final Feature importance and churn rates for categorical features:
| feature importance | value | churn rate (%) | |
|---|---|---|---|
| contract | 0.299214 | Month-to-month | 42.7 |
| contract | 0.299214 | One year | 11.1 |
| contract | 0.299214 | Two year | 3.3 |
| onlinesecurity | 0.146574 | No | 41.7 |
| onlinesecurity | 0.146574 | Yes | 14.5 |
| onlinesecurity | 0.146574 | No internet service | 7.8 |
| techsupport | 0.106391 | No | 41.4 |
| techsupport | 0.106391 | Yes | 15.5 |
| techsupport | 0.106391 | No internet service | 7.8 |
| internetservice | 0.091834 | Fiber optic | 41.7 |
| internetservice | 0.091834 | DSL | 18.8 |
| internetservice | 0.091834 | No | 7.8 |
| paymentmethod | 0.020570 | Electronic check | 44.8 |
| paymentmethod | 0.020570 | Mailed check | 19.6 |
| paymentmethod | 0.020570 | Bank transfer (automatic) | 16.4 |
| paymentmethod | 0.020570 | Credit card (automatic) | 15.8 |
| onlinebackup | 0.013619 | No | 39.9 |
| onlinebackup | 0.013619 | Yes | 21.2 |
| onlinebackup | 0.013619 | No internet service | 7.8 |
| deviceprotection | 0.007952 | No | 38.9 |
| deviceprotection | 0.007952 | Yes | 22.6 |
| deviceprotection | 0.007952 | No internet service | 7.8 |
| streamingtv | 0.006070 | No | 33.0 |
| streamingtv | 0.006070 | Yes | 30.4 |
| streamingtv | 0.006070 | No internet service | 7.8 |
| streamingmovies | 0.005999 | No | 33.2 |
| streamingmovies | 0.005999 | Yes | 30.2 |
| streamingmovies | 0.005999 | No internet service | 7.8 |
Feature importance for numerical features:
| feature importance | |
|---|---|
| monthlycharges | 0.043235 |
| totalcharges | 0.042620 |
| t_m_ratio | 0.125329 |
| tenure | 0.090592 |
from churnfli.prepare_data import prepare_data_pipeline
df_train, df_test = prepare_data_pipeline()()
from churnfli.feature_selection import feature_selection
data = feature_selection(df_train, df_test)
##################################################### Perform rough feature selection with a RF model... ##################################################### Mean accuracy on the train data is 0.7459555555555556 These trival features will be excluded from modelling: ['paperlessbilling', 'multiplelines', 'seniorcitizen', 'dependents', 'partner', 'phoneservice', 'gender']
from churnfli.main import train_pipeline
train_pipeline()()
############################################################## ###################### Start to train Logistic models... ############################################################## Performing grid search... Fitting 5 folds for each of 108 candidates, totalling 540 fits Saving model: ./models/v3_Logistic ############################################################## ###################### Start to train RF models... ############################################################## Performing grid search... Fitting 5 folds for each of 27 candidates, totalling 135 fits Saving model: ./models/v3_RF ############################################################## ###################### Start to train XGB models... ############################################################## Performing grid search... Fitting 5 folds for each of 1 candidates, totalling 5 fits [11:51:23] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior. Saving model: ./models/v3_XGB Loading model: models/v3_Logistic Saving model: ./models/final/v3_Logistic_final Loading model: models/v3_RF Saving model: ./models/final/v3_RF_final Loading model: models/v3_XGB [11:51:35] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior. Saving model: ./models/final/v3_XGB_final ##################################################### GINI on the train and test data for the re-trained best models are: #####################################################
| train | test | |
|---|---|---|
| v3_Logistic_final | 0.677458 | 0.711522 |
| v3_RF_final | 0.701476 | 0.709234 |
| v3_XGB_final | 0.686274 | 0.703473 |
from churnfli.metrics import evaluate_pred, consolidate_pred
GINIs, sensi_fig = evaluate_pred(consolidate_pred())
display(GINIs)
sensi_fig.show()
Loading model: models/final/v3_Logistic_final Loading model: models/final/v3_RF_final Loading model: models/final/v3_XGB_final
| prob_Logistic | prob_RF | prob_XGB | |
|---|---|---|---|
| train_test | |||
| test | 0.711522 | 0.709234 | 0.703473 |
| train | 0.677458 | 0.701476 | 0.686274 |
from churnfli.metrics import churn_rate_summary, consolidate_pred
churn_cat, churn_num_fig = churn_rate_summary(consolidate_pred())
display(churn_cat)
churn_num_fig.show()
Loading model: models/final/v3_Logistic_final Loading model: models/final/v3_RF_final Loading model: models/final/v3_XGB_final Loading model: models/final/v3_RF_final Feature importance and churn rates for categorical features:
| feature importance | value | churn rate (%) | |
|---|---|---|---|
| contract | 0.318129 | Month-to-month | 42.7 |
| contract | 0.318129 | One year | 11.1 |
| contract | 0.318129 | Two year | 3.3 |
| onlinesecurity | 0.132654 | No | 41.7 |
| onlinesecurity | 0.132654 | Yes | 14.5 |
| onlinesecurity | 0.132654 | No internet service | 7.8 |
| techsupport | 0.093969 | No | 41.4 |
| techsupport | 0.093969 | Yes | 15.5 |
| techsupport | 0.093969 | No internet service | 7.8 |
| internetservice | 0.091783 | Fiber optic | 41.7 |
| internetservice | 0.091783 | DSL | 18.8 |
| internetservice | 0.091783 | No | 7.8 |
| paymentmethod | 0.023253 | Electronic check | 44.8 |
| paymentmethod | 0.023253 | Mailed check | 19.6 |
| paymentmethod | 0.023253 | Bank transfer (automatic) | 16.4 |
| paymentmethod | 0.023253 | Credit card (automatic) | 15.8 |
| onlinebackup | 0.014069 | No | 39.9 |
| onlinebackup | 0.014069 | Yes | 21.2 |
| onlinebackup | 0.014069 | No internet service | 7.8 |
| streamingmovies | 0.008122 | No | 33.2 |
| streamingmovies | 0.008122 | Yes | 30.2 |
| streamingmovies | 0.008122 | No internet service | 7.8 |
| deviceprotection | 0.007845 | No | 38.9 |
| deviceprotection | 0.007845 | Yes | 22.6 |
| deviceprotection | 0.007845 | No internet service | 7.8 |
| streamingtv | 0.007451 | No | 33.0 |
| streamingtv | 0.007451 | Yes | 30.4 |
| streamingtv | 0.007451 | No internet service | 7.8 |
Feature importance for numerical features:
| feature importance | |
|---|---|
| monthlycharges | 0.045267 |
| totalcharges | 0.040294 |
| t_m_ratio | 0.129151 |
| tenure | 0.088012 |
| feature importance | value | churn rate (%) | |
|---|---|---|---|
| contract | 0.318129 | Month-to-month | 42.7 |
| contract | 0.318129 | One year | 11.1 |
| contract | 0.318129 | Two year | 3.3 |
| onlinesecurity | 0.132654 | No | 41.7 |
| onlinesecurity | 0.132654 | Yes | 14.5 |
| onlinesecurity | 0.132654 | No internet service | 7.8 |
| techsupport | 0.093969 | No | 41.4 |
| techsupport | 0.093969 | Yes | 15.5 |
| techsupport | 0.093969 | No internet service | 7.8 |
| internetservice | 0.091783 | Fiber optic | 41.7 |
| internetservice | 0.091783 | DSL | 18.8 |
| internetservice | 0.091783 | No | 7.8 |
| paymentmethod | 0.023253 | Electronic check | 44.8 |
| paymentmethod | 0.023253 | Mailed check | 19.6 |
| paymentmethod | 0.023253 | Bank transfer (automatic) | 16.4 |
| paymentmethod | 0.023253 | Credit card (automatic) | 15.8 |
| onlinebackup | 0.014069 | No | 39.9 |
| onlinebackup | 0.014069 | Yes | 21.2 |
| onlinebackup | 0.014069 | No internet service | 7.8 |
| streamingmovies | 0.008122 | No | 33.2 |
| streamingmovies | 0.008122 | Yes | 30.2 |
| streamingmovies | 0.008122 | No internet service | 7.8 |
| deviceprotection | 0.007845 | No | 38.9 |
| deviceprotection | 0.007845 | Yes | 22.6 |
| deviceprotection | 0.007845 | No internet service | 7.8 |
| streamingtv | 0.007451 | No | 33.0 |
| streamingtv | 0.007451 | Yes | 30.4 |
| streamingtv | 0.007451 | No internet service | 7.8 |